# Differential expression on Kallisto data 

# Preliminary samples - 2016 dataset

# Packages and dependence
packageCheckClassic <- function(x){
  # 
  for( i in x ){
    if( ! require( i , character.only = TRUE ) ){
      install.packages( i , dependencies = TRUE )
      require( i , character.only = TRUE )
    }
  }
}

packageCheckClassic(c('goseq','DESeq2','adegenet','vsn','devtools','BiocManager','ggplot2','ggrepel','markdown','pheatmap','RColorBrewer','genefilter','gplots','vegan','dplyr','limma'))
## Loading required package: goseq
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
## 
## Loading required package: DESeq2
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.1.3
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:geneLenDataBase':
## 
##     unfactor
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
## Loading required package: adegenet
## Loading required package: ade4
## 
## Attaching package: 'ade4'
## The following object is masked from 'package:GenomicRanges':
## 
##     score
## The following object is masked from 'package:BiocGenerics':
## 
##     score
## 
##    /// adegenet 2.1.10 is loaded ////////////
## 
##    > overview: '?adegenet'
##    > tutorials/doc/questions: 'adegenetWeb()' 
##    > bug reports/feature requests: adegenetIssues()
## Loading required package: vsn
## Loading required package: devtools
## Loading required package: usethis
## Loading required package: BiocManager
## Bioconductor version '3.14' is out-of-date; the current release version '3.17'
##   is available with R version '4.3'; see https://bioconductor.org/install
## 
## Attaching package: 'BiocManager'
## The following object is masked from 'package:devtools':
## 
##     install
## Loading required package: ggplot2
## Loading required package: ggrepel
## Loading required package: markdown
## Loading required package: pheatmap
## Loading required package: RColorBrewer
## Loading required package: genefilter
## 
## Attaching package: 'genefilter'
## The following objects are masked from 'package:MatrixGenerics':
## 
##     rowSds, rowVars
## The following objects are masked from 'package:matrixStats':
## 
##     rowSds, rowVars
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:IRanges':
## 
##     space
## The following object is masked from 'package:S4Vectors':
## 
##     space
## The following object is masked from 'package:stats':
## 
##     lowess
## Loading required package: vegan
## Loading required package: permute
## 
## Attaching package: 'permute'
## The following object is masked from 'package:devtools':
## 
##     check
## Loading required package: lattice
## This is vegan 2.6-4
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following object is masked from 'package:matrixStats':
## 
##     count
## The following objects are masked from 'package:GenomicRanges':
## 
##     intersect, setdiff, union
## The following object is masked from 'package:GenomeInfoDb':
## 
##     intersect
## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Loading required package: limma
## Warning: package 'limma' was built under R version 4.1.3
## 
## Attaching package: 'limma'
## The following object is masked from 'package:DESeq2':
## 
##     plotMA
## The following object is masked from 'package:BiocGenerics':
## 
##     plotMA
#BiocManager::install('tximport', force = TRUE)
#BiocManager::install('apeglm')
#BiocManager::install('ashr')
#BiocManager::install("EnhancedVolcano")
#BiocManager::install("arrayQualityMetrics")
#BiocManager::install("goseq")
if (!require(devtools)) install.packages("devtools")
devtools::install_github("yanlinlin82/ggvenn")
## Skipping install of 'ggvenn' from a github remote, the SHA1 (25fd3b55) has not changed since last install.
##   Use `force = TRUE` to force installation
install_github("pmartinezarbizu/pairwiseAdonis/pairwiseAdonis") 
## Skipping install of 'pairwiseAdonis' from a github remote, the SHA1 (cb190f76) has not changed since last install.
##   Use `force = TRUE` to force installation
library("pairwiseAdonis")
## Loading required package: cluster
library("adegenet")
library('ggvenn')
## Loading required package: grid
library('tximport')
library('apeglm')
library('ashr')
library('EnhancedVolcano')
## Registered S3 methods overwritten by 'ggalt':
##   method                  from   
##   grid.draw.absoluteGrob  ggplot2
##   grobHeight.absoluteGrob ggplot2
##   grobWidth.absoluteGrob  ggplot2
##   grobX.absoluteGrob      ggplot2
##   grobY.absoluteGrob      ggplot2
library('BiocManager')
library('goseq')
source_url("https://raw.githubusercontent.com/obigriffith/biostar-tutorials/master/Heatmaps/heatmap.3.R")
## ℹ SHA-1 hash of file is "015fc0457e61e3e93a903e69a24d96d2dac7b9fb"
# Working environment and data loading
scriptPath<-dirname(rstudioapi::getSourceEditorContext()$path)
setwd(scriptPath)
#candidateGenes<-read.csv('candidateGenes.csv',header=T,sep=',')
samplesSaccharina<-read.table('saccharinaDesignMulti.txt',header=T)
samplesHedophylum<-read.table('hedophylumDesignMulti.txt',header=T)
dataPath<-'/Users/mmeynadier/Documents/kelpProject/kallistoOutput'
outputPath<-'/Users/mmeynadier/Documents/kelpProject/DESeq2Output'
#setwd(dataPath)

# DDS object

# If data from kallisto
tx2geneSaccharina<-read.table('Saccharina_tx2gene',header=T)
tx2geneHedophylum<-read.table('Hedophylum_tx2gene',header=T)


# 
# # Data importation - txImport
setwd(dataPath)
filesSaccharina<-paste0(samplesSaccharina$sample)
txiSaccharina<-tximport(files = filesSaccharina,type='kallisto',tx2gene = tx2geneSaccharina)
## Note: importing `abundance.h5` is typically faster than `abundance.tsv`
## reading in files with read_tsv
## 1 2 3 4 5 6 7 8 
## transcripts missing from tx2gene: 8998
## summarizing abundance
## summarizing counts
## summarizing length
filesHedophylum<-paste0(samplesHedophylum$sample)
txiHedophylum<-tximport(files = filesHedophylum,type='kallisto',tx2gene = tx2geneHedophylum)
## Note: importing `abundance.h5` is typically faster than `abundance.tsv`
## reading in files with read_tsv
## 1 2 3 4 5 6 7 8 
## transcripts missing from tx2gene: 9442
## summarizing abundance
## summarizing counts
## summarizing length
ddsSaccharina<-DESeqDataSetFromTximport(txiSaccharina,colData=samplesSaccharina,design= ~treatment + mesocosm)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## using counts and average transcript lengths from tximport
ddsHedophylum<-DESeqDataSetFromTximport(txiHedophylum,colData=samplesHedophylum,design= ~treatment + mesocosm)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## using counts and average transcript lengths from tximport
# pre-filtering
keepSaccharina <- rowSums(counts(ddsSaccharina)) >= 10 
ddsSaccharina <- ddsSaccharina[keepSaccharina,]
keepHedophylum <- rowSums(counts(ddsHedophylum)) >= 10 
ddsHedophylum <- ddsHedophylum[keepHedophylum,]

# Differential expression analysis
ddsSaccharina<-DESeq(ddsSaccharina)
## estimating size factors
## using 'avgTxLength' from assays(dds), correcting for library size
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
cbind(resultsNames(ddsSaccharina))
##      [,1]               
## [1,] "Intercept"        
## [2,] "treatment_T1_vs_C"
## [3,] "treatment_T2_vs_C"
## [4,] "treatment_T3_vs_C"
## [5,] "mesocosm_M1_vs_M0"
## [6,] "mesocosm_M2_vs_M0"
ddsHedophylum<-DESeq(ddsHedophylum)
## estimating size factors
## using 'avgTxLength' from assays(dds), correcting for library size
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
cbind(resultsNames(ddsHedophylum))
##      [,1]               
## [1,] "Intercept"        
## [2,] "treatment_T1_vs_C"
## [3,] "treatment_T2_vs_C"
## [4,] "treatment_T3_vs_C"
## [5,] "mesocosm_M1_vs_M0"
## [6,] "mesocosm_M2_vs_M0"
# Exploring the results - Saccharina

S_C_vs_T1 <- results(ddsSaccharina,contrast=c("treatment", "T1", "C"))
S_C_vs_T1_trimmed <- subset(S_C_vs_T1, padj < 0.05)
S_C_vs_T1_trimmed <- subset(S_C_vs_T1_trimmed, log2FoldChange > 2 | log2FoldChange < -2)
S_C_vs_T2 <- results(ddsSaccharina,contrast=c("treatment", "T2", "C"))
S_C_vs_T2_trimmed <- subset(S_C_vs_T2, padj < 0.05)
S_C_vs_T2_trimmed <- subset(S_C_vs_T2_trimmed, log2FoldChange > 2 | log2FoldChange < -2)
S_C_vs_T3 <- results(ddsSaccharina,contrast=c("treatment", "T3", "C"))
S_C_vs_T3_trimmed <- subset(S_C_vs_T3, padj < 0.05)
S_C_vs_T3_trimmed <- subset(S_C_vs_T3_trimmed, log2FoldChange > 2 | log2FoldChange < -2)
S_T1_vs_T2 <- results(ddsSaccharina,contrast=c("treatment", "T2", "T1"))
S_T1_vs_T2_trimmed <- subset(S_T1_vs_T2, padj < 0.05)
S_T1_vs_T2_trimmed <- subset(S_T1_vs_T2_trimmed, log2FoldChange > 2 | log2FoldChange < -2)
S_T1_vs_T3 <- results(ddsSaccharina,contrast=c("treatment", "T3", "T1"))
S_T1_vs_T3_trimmed <- subset(S_T1_vs_T3, padj < 0.05)
S_T1_vs_T3_trimmed <- subset(S_T1_vs_T3_trimmed, log2FoldChange > 2 | log2FoldChange < -2)
S_T2_vs_T3 <- results(ddsSaccharina,contrast=c("treatment", "T3", "T2"))
S_T2_vs_T3_trimmed <- subset(S_T2_vs_T3, padj < 0.05)
S_T2_vs_T3_trimmed <- subset(S_T2_vs_T3_trimmed, log2FoldChange > 2 | log2FoldChange < -2)

DESeq2::plotMA(S_C_vs_T1_trimmed,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nC vs T1")

DESeq2::plotMA(S_C_vs_T2_trimmed,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nC vs T2")

DESeq2::plotMA(S_C_vs_T3_trimmed,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nC vs T3")

DESeq2::plotMA(S_T1_vs_T2_trimmed,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nT1 vs T2")

DESeq2::plotMA(S_T1_vs_T3_trimmed,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nT1 vs T3")

DESeq2::plotMA(S_T2_vs_T3_trimmed,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nT2 vs T3")

vsdSaccharina <- vst(ddsSaccharina, blind=T)

meanSdPlot(assay(vsdSaccharina))

ntd <- normTransform(ddsSaccharina)
meanSdPlot(assay(ntd))

select <- order(rowMeans(counts(ddsSaccharina,normalized=TRUE)),
                decreasing=TRUE)[1:20]
df <- as.data.frame(colData(ddsSaccharina)[,c("treatment","mesocosm")])
pheatmap(assay(vsdSaccharina)[select,], cluster_rows=FALSE, show_rownames=F,
         cluster_cols=FALSE, annotation_col=df)

pcaData <- plotPCA(vsdSaccharina, intgroup=c("treatment", "mesocosm"), returnData=TRUE) 
percentVar <- round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, color=treatment, shape=mesocosm)) +
  geom_point(size=3) +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance")) +
  coord_fixed() + theme_classic() 

sampleDists <- dist(t(assay(vsdSaccharina)))
library("RColorBrewer")
sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <- paste(vsdSaccharina$treatment, vsdSaccharina$mesocosm, sep="-")
colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
         clustering_distance_rows=sampleDists,
         clustering_distance_cols=sampleDists,
         col=colors)

count_tab_assay <- assay(vsdSaccharina)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad <- adonis2(data=samplesSaccharina,dist_tab_assay ~ treatment + mesocosm, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_tab_assay ~ treatment + mesocosm, data = samplesSaccharina, method = "euclidian")
##           Df SumOfSqs      R2      F Pr(>F)
## treatment  3    97266 0.39825 0.9256  0.649
## mesocosm   2    76913 0.31492 1.0979  0.321
## Residual   2    70053 0.28683              
## Total      7   244232 1.00000
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesSaccharina$treatment)
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
pair.mod
##      pairs Df SumsOfSqs   F.Model        R2   p.value p.adjusted sig
## 1  C vs T1  1  33853.90 0.9221985 0.2351229 0.6000000          1    
## 2  C vs T2  1  34406.16 0.9984946 0.3329986 0.7500000          1    
## 3  C vs T3  1  41524.33 1.1779746 0.2819487 0.3000000          1    
## 4 T1 vs T2  1  23732.10 0.5758274 0.3654127 1.0000000          1    
## 5 T1 vs T3  1  26776.56 0.6861412 0.2554375 1.0000000          1    
## 6 T2 vs T3  1  28436.44 0.7719787 0.4356591 0.6666667          1
mod <- betadisper(dist_tab_assay,samplesSaccharina$treatment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = distances ~ group, data = df)
## 
## $group
##              diff        lwr        upr     p adj
## T1-C    -7.942638  -29.28590   13.40062 0.5063862
## T2-C  -151.493863 -178.49119 -124.49654 0.0001005
## T3-C   -15.781330  -37.12459    5.56193 0.1233041
## T2-T1 -143.551225 -172.18621 -114.91624 0.0001395
## T3-T1   -7.838692  -31.21906   15.54168 0.5764828
## T3-T2  135.712533  107.07754  164.34752 0.0001650
mod <- betadisper(dist_tab_assay,samplesSaccharina$mesocosm)
mod.HSD <- TukeyHSD(mod)
mod.HSD
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = distances ~ group, data = df)
## 
## $group
##             diff       lwr       upr     p adj
## M1-M0 156.184407  31.57115 280.79766 0.0216549
## M2-M0 149.923883  21.22387 278.62390 0.0287003
## M2-M1  -6.260524 -91.38758  78.86653 0.9691174
ordered_S_C_vs_T1<- S_C_vs_T1_trimmed[order(S_C_vs_T1_trimmed$padj),]
ordered_S_C_vs_T2<- S_C_vs_T2_trimmed[order(S_C_vs_T2_trimmed$padj),]
ordered_S_C_vs_T3<- S_C_vs_T3_trimmed[order(S_C_vs_T3_trimmed$padj),]
ordered_S_T1_vs_T2<- S_T1_vs_T2_trimmed[order(S_T1_vs_T2_trimmed$padj),]
ordered_S_T1_vs_T3<- S_T1_vs_T3_trimmed[order(S_T1_vs_T3_trimmed$padj),]
ordered_S_T2_vs_T3<- S_T2_vs_T3_trimmed[order(S_T2_vs_T3_trimmed$padj),]

write.csv(ordered_S_C_vs_T1, file = '../DESeq2/Saccharina/S_C_VS_T1.csv',sep='')
## Warning in write.csv(ordered_S_C_vs_T1, file =
## "../DESeq2/Saccharina/S_C_VS_T1.csv", : attempt to set 'sep' ignored
write.csv(ordered_S_C_vs_T2, file = '../DESeq2/Saccharina/S_C_VS_T2.csv',sep='')
## Warning in write.csv(ordered_S_C_vs_T2, file =
## "../DESeq2/Saccharina/S_C_VS_T2.csv", : attempt to set 'sep' ignored
write.csv(ordered_S_C_vs_T3, file = '../DESeq2/Saccharina/S_C_VS_T3.csv',sep='')
## Warning in write.csv(ordered_S_C_vs_T3, file =
## "../DESeq2/Saccharina/S_C_VS_T3.csv", : attempt to set 'sep' ignored
write.csv(ordered_S_T1_vs_T2, file = '../DESeq2/Saccharina/S_T1_VS_T2.csv',sep='')
## Warning in write.csv(ordered_S_T1_vs_T2, file =
## "../DESeq2/Saccharina/S_T1_VS_T2.csv", : attempt to set 'sep' ignored
write.csv(ordered_S_T1_vs_T2, file ='../DESeq2/Saccharina/S_T1_VS_T2.csv',sep='')
## Warning in write.csv(ordered_S_T1_vs_T2, file =
## "../DESeq2/Saccharina/S_T1_VS_T2.csv", : attempt to set 'sep' ignored
write.csv(ordered_S_T1_vs_T3, file ='../DESeq2/Saccharina/S_T1_VS_T3.csv',sep='')
## Warning in write.csv(ordered_S_T1_vs_T3, file =
## "../DESeq2/Saccharina/S_T1_VS_T3.csv", : attempt to set 'sep' ignored
write.csv(ordered_S_T2_vs_T3, file ='../DESeq2/Saccharina/S_T2_VS_T3.csv',sep='')
## Warning in write.csv(ordered_S_T2_vs_T3, file =
## "../DESeq2/Saccharina/S_T2_VS_T3.csv", : attempt to set 'sep' ignored
# Hedophylum

H_C_vs_T1 <- results(ddsHedophylum,contrast=c("treatment", "T1", "C"))
H_C_vs_T1_trimmed <- subset(H_C_vs_T1, padj < 0.05)
H_C_vs_T1_trimmed <- subset(H_C_vs_T1_trimmed, log2FoldChange > 2 | log2FoldChange < -2)
H_C_vs_T2 <- results(ddsHedophylum,contrast=c("treatment", "T2", "C"))
H_C_vs_T2_trimmed <- subset(H_C_vs_T2, padj < 0.05)
H_C_vs_T2_trimmed <- subset(H_C_vs_T2_trimmed, log2FoldChange > 2 | log2FoldChange < -2)
H_C_vs_T3 <- results(ddsHedophylum,contrast=c("treatment", "T3", "C"))
H_C_vs_T3_trimmed <- subset(H_C_vs_T3, padj < 0.05)
H_C_vs_T3_trimmed <- subset(H_C_vs_T3_trimmed, log2FoldChange > 2 | log2FoldChange < -2)
H_T1_vs_T2 <- results(ddsHedophylum,contrast=c("treatment", "T2", "T1"))
H_T1_vs_T2_trimmed <- subset(H_T1_vs_T2, padj < 0.05)
H_T1_vs_T2_trimmed <- subset(H_T1_vs_T2_trimmed, log2FoldChange > 2 | log2FoldChange < -2)
H_T1_vs_T3 <- results(ddsHedophylum,contrast=c("treatment", "T3", "T1"))
H_T1_vs_T3_trimmed <- subset(H_T1_vs_T3, padj < 0.05)
H_T1_vs_T3_trimmed <- subset(H_T1_vs_T3_trimmed, log2FoldChange > 2 | log2FoldChange < -2)
H_T2_vs_T3 <- results(ddsHedophylum,contrast=c("treatment", "T3", "T2"))
H_T2_vs_T3_trimmed <- subset(H_T2_vs_T3, padj < 0.05)
H_T2_vs_T3_trimmed <- subset(H_T2_vs_T3_trimmed, log2FoldChange > 2 | log2FoldChange < -2)

DESeq2::plotMA(H_C_vs_T1_trimmed,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nC vs T1")

DESeq2::plotMA(H_C_vs_T2_trimmed,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nC vs T2")

DESeq2::plotMA(H_C_vs_T3_trimmed,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nC vs T3")

DESeq2::plotMA(H_T1_vs_T2_trimmed,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nT1 vs T2")

DESeq2::plotMA(H_T1_vs_T3_trimmed,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nT1 vs T3")

DESeq2::plotMA(H_T2_vs_T3_trimmed,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nT2 vs T3")

vsdHedophylum <- vst(ddsHedophylum, blind=T)

meanSdPlot(assay(vsdHedophylum))

ntd <- normTransform(ddsHedophylum)
meanSdPlot(assay(ntd))

select <- order(rowMeans(counts(ddsHedophylum,normalized=TRUE)),
                decreasing=TRUE)[1:20]
df <- as.data.frame(colData(ddsHedophylum)[,c("treatment","mesocosm")])
pheatmap(assay(vsdHedophylum)[select,], cluster_rows=FALSE, show_rownames=F,
         cluster_cols=FALSE, annotation_col=df)

pcaData <- plotPCA(vsdHedophylum, intgroup=c("treatment", "mesocosm"), returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, color=treatment, shape=mesocosm)) +
  geom_point(size=3) +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance")) + 
  coord_fixed() + theme_classic()

sampleDists <- dist(t(assay(vsdHedophylum)))
library("RColorBrewer")
sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <- paste(vsdHedophylum$treatment, vsdHedophylum$mesocosm, sep="-")
colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
         clustering_distance_rows=sampleDists,
         clustering_distance_cols=sampleDists,
         col=colors)

count_tab_assay <- assay(vsdHedophylum)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad <- adonis2(data=samplesHedophylum,dist_tab_assay ~ treatment + mesocosm, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_tab_assay ~ treatment + mesocosm, data = samplesHedophylum, method = "euclidian")
##           Df SumOfSqs      R2      F Pr(>F)
## treatment  3    74595 0.51481 1.2364  0.121
## mesocosm   2    30082 0.20761 0.7479  0.934
## Residual   2    40220 0.27758              
## Total      7   144896 1.00000
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesHedophylum$treatment)
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
pair.mod
##      pairs Df SumsOfSqs   F.Model        R2   p.value p.adjusted sig
## 1  C vs T1  1  25257.52 1.5996828 0.4443955 0.3333333          1    
## 2  C vs T2  1  33516.99 2.0688708 0.5084631 0.3333333          1    
## 3  C vs T3  1  28645.67 1.3980340 0.4114244 0.3333333          1    
## 4 T1 vs T2  1  21663.63 1.4776619 0.4249010 0.3333333          1    
## 5 T1 vs T3  1  17078.13 0.9012158 0.3106338 1.0000000          1    
## 6 T2 vs T3  1  23027.39 1.1893305 0.3729091 0.3333333          1
mod <- betadisper(dist_tab_assay,samplesHedophylum$treatment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = distances ~ group, data = df)
## 
## $group
##            diff       lwr       upr p adj
## T1-C  -8.675808 -8.675808 -8.675808     0
## T2-C  -6.272203 -6.272203 -6.272203     0
## T3-C  15.661914 15.661914 15.661914     0
## T2-T1  2.403605  2.403605  2.403605     0
## T3-T1 24.337722 24.337722 24.337722     0
## T3-T2 21.934117 21.934117 21.934117     0
mod <- betadisper(dist_tab_assay,samplesHedophylum$mesocosm)
mod.HSD <- TukeyHSD(mod)
mod.HSD
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = distances ~ group, data = df)
## 
## $group
##             diff        lwr      upr     p adj
## M1-M0 -32.369342 -84.333467 19.59478 0.2008276
## M2-M0   3.426461 -41.575791 48.42871 0.9669503
## M2-M1  35.795803  -9.206449 80.79806 0.1046991
ordered_H_C_vs_T1<- H_C_vs_T1_trimmed[order(H_C_vs_T1_trimmed$padj),]
ordered_H_C_vs_T2<- H_C_vs_T2_trimmed[order(H_C_vs_T2_trimmed$padj),]
ordered_H_C_vs_T3<- H_C_vs_T3_trimmed[order(H_C_vs_T3_trimmed$padj),]
ordered_H_T1_vs_T2<- H_T1_vs_T2_trimmed[order(H_T1_vs_T2_trimmed$padj),]
ordered_H_T1_vs_T3<- H_T1_vs_T3_trimmed[order(H_T1_vs_T3_trimmed$padj),]
ordered_H_T2_vs_T3<- H_T2_vs_T3_trimmed[order(H_T2_vs_T3_trimmed$padj),]

write.csv(ordered_H_C_vs_T1, file = '../DESeq2/Hedophylum/H_C_VS_T1.csv',sep='')
## Warning in write.csv(ordered_H_C_vs_T1, file =
## "../DESeq2/Hedophylum/H_C_VS_T1.csv", : attempt to set 'sep' ignored
write.csv(ordered_H_C_vs_T2, file = '../DESeq2/Hedophylum/H_C_VS_T2.csv',sep='')
## Warning in write.csv(ordered_H_C_vs_T2, file =
## "../DESeq2/Hedophylum/H_C_VS_T2.csv", : attempt to set 'sep' ignored
write.csv(ordered_H_C_vs_T3, file = '../DESeq2/Hedophylum/H_C_VS_T3.csv',sep='')
## Warning in write.csv(ordered_H_C_vs_T3, file =
## "../DESeq2/Hedophylum/H_C_VS_T3.csv", : attempt to set 'sep' ignored
write.csv(ordered_H_T1_vs_T2, file = '../DESeq2/Hedophylum/H_T1_VS_T2.csv',sep='')
## Warning in write.csv(ordered_H_T1_vs_T2, file =
## "../DESeq2/Hedophylum/H_T1_VS_T2.csv", : attempt to set 'sep' ignored
write.csv(ordered_H_T1_vs_T2, file ='../DESeq2/Hedophylum/H_T1_VS_T2.csv',sep='')
## Warning in write.csv(ordered_H_T1_vs_T2, file =
## "../DESeq2/Hedophylum/H_T1_VS_T2.csv", : attempt to set 'sep' ignored
write.csv(ordered_H_T1_vs_T3, file ='../DESeq2/Hedophylum/H_T1_VS_T3.csv',sep='')
## Warning in write.csv(ordered_H_T1_vs_T3, file =
## "../DESeq2/Hedophylum/H_T1_VS_T3.csv", : attempt to set 'sep' ignored
write.csv(ordered_H_T2_vs_T3, file ='../DESeq2/Hedophylum/H_T2_VS_T3.csv',sep='')
## Warning in write.csv(ordered_H_T2_vs_T3, file =
## "../DESeq2/Hedophylum/H_T2_VS_T3.csv", : attempt to set 'sep' ignored
heatmap_genes <- function(candidateGenes,vsd,species,genesType,colNames) {
listGenes <- candidateGenes$genes
listGenes2 <- which(rownames(vsd) %in% listGenes)
index <- which(listGenes %in% rownames(vsd))
candidateGenes2 <- candidateGenes[index, ] 
listProt <- candidateGenes2$pfam_annotation
listGenes3 <- candidateGenes2$genes

vsdCandidate <- vsd[listGenes3, ]

colnames(vsdCandidate) <- colNames
rownames(vsdCandidate) <- listProt

topVarGenesVsd <- head(order(rowVars(assay(vsdCandidate)), decreasing=TRUE), 50 )

heatmap.2(assay(vsdCandidate)[topVarGenesVsd,], trace="none",scale="row",keysize=1.15,key.xlab = "",
          key.title = "",
          col=colorRampPalette(rev(brewer.pal(11,"PuOr")))(255), cexRow=0.6, cexCol=0.7,density.info="none",
          ylab="PFAM annotation of expressed genes",Colv = F,margins = c(6, 7))
main=paste("Expression level of genes related to",genesType,"in",species,sep= " ")
title(main, cex.main = 0.9)
}

stressListSaccharina<-read.csv('../DESeq2/Saccharina/stressList.csv',header=T,sep=',')
metaboListSaccharina<-read.csv('../DESeq2/Saccharina/metaboList.csv',header=T,sep=',')
transportListSaccharina<-read.csv('../DESeq2/Saccharina/transportList.csv',header=T,sep=',')
photoListSaccharina<-read.csv('../DESeq2/Saccharina/photoList.csv',header=T,sep=',')
signalingListSaccharina<-read.csv('../DESeq2/Saccharina/signalingList.csv',header=T,sep=',')
geneticListSaccharina<-read.csv('../DESeq2/Saccharina/geneticList.csv',header=T,sep=',')
cytoskeletonListSaccharina<-read.csv('../DESeq2/Saccharina/cytoskeletonList.csv',header=T,sep=',')

colnamesSaccharina <- c('C_M0','C_M1','C_M2','T1_M1','T1_M2','T2_M1','T3_M1','T3_M2')

heatmap_genes(cytoskeletonListSaccharina,vsdSaccharina,'Saccharina','cytoskeleton',colnamesSaccharina)
## Warning in heatmap.2(assay(vsdCandidate)[topVarGenesVsd, ], trace = "none", :
## Discrepancy: Colv is FALSE, while dendrogram is `both'. Omitting column
## dendogram.

heatmap_genes(geneticListSaccharina,vsdSaccharina,'Saccharina','transcription / translation',colnamesSaccharina)
## Warning in heatmap.2(assay(vsdCandidate)[topVarGenesVsd, ], trace = "none", :
## Discrepancy: Colv is FALSE, while dendrogram is `both'. Omitting column
## dendogram.

heatmap_genes(metaboListSaccharina,vsdSaccharina,'Saccharina','metabolism',colnamesSaccharina)
## Warning in heatmap.2(assay(vsdCandidate)[topVarGenesVsd, ], trace = "none", :
## Discrepancy: Colv is FALSE, while dendrogram is `both'. Omitting column
## dendogram.

heatmap_genes(signalingListSaccharina,vsdSaccharina,'Saccharina','signaling',colnamesSaccharina)
## Warning in heatmap.2(assay(vsdCandidate)[topVarGenesVsd, ], trace = "none", :
## Discrepancy: Colv is FALSE, while dendrogram is `both'. Omitting column
## dendogram.

heatmap_genes(transportListSaccharina,vsdSaccharina,'Saccharina','transport',colnamesSaccharina)
## Warning in heatmap.2(assay(vsdCandidate)[topVarGenesVsd, ], trace = "none", :
## Discrepancy: Colv is FALSE, while dendrogram is `both'. Omitting column
## dendogram.

heatmap_genes(stressListSaccharina,vsdSaccharina,'Saccharina','stress',colnamesSaccharina)
## Warning in heatmap.2(assay(vsdCandidate)[topVarGenesVsd, ], trace = "none", :
## Discrepancy: Colv is FALSE, while dendrogram is `both'. Omitting column
## dendogram.

heatmap_genes(photoListSaccharina,vsdSaccharina,'Saccharina','respiration',colnamesSaccharina)
## Warning in heatmap.2(assay(vsdCandidate)[topVarGenesVsd, ], trace = "none", :
## Discrepancy: Colv is FALSE, while dendrogram is `both'. Omitting column
## dendogram.

stressListHedophylum<-read.csv('../DESeq2/Hedophylum/stressList.csv',header=T,sep=',')
metaboListHedophylum<-read.csv('../DESeq2/Hedophylum/metaboList.csv',header=T,sep=',')
transportListHedophylum<-read.csv('../DESeq2/Hedophylum/transportList.csv',header=T,sep=',')
photoListHedophylum<-read.csv('../DESeq2/Hedophylum/photoList.csv',header=T,sep=',')
signalingListHedophylum<-read.csv('../DESeq2/Hedophylum/signalingList.csv',header=T,sep=',')
geneticListHedophylum<-read.csv('../DESeq2/Hedophylum/geneticList.csv',header=T,sep=',')
cytoskeletonListHedophylum<-read.csv('../DESeq2/Hedophylum/cytoskeletonList.csv',header=T,sep=',')

colnamesHedophylum <- c('C_M0','C_M2','T1_M1','T1_M2','T2_M1','T2_M2','T3_M0','T3_M2')

heatmap_genes(cytoskeletonListHedophylum,vsdHedophylum,'Hedophylum','cytoskeleton',colnamesHedophylum)
## Warning in heatmap.2(assay(vsdCandidate)[topVarGenesVsd, ], trace = "none", :
## Discrepancy: Colv is FALSE, while dendrogram is `both'. Omitting column
## dendogram.

heatmap_genes(geneticListHedophylum,vsdHedophylum,'Hedophylum','transcription / translation',colnamesHedophylum)
## Warning in heatmap.2(assay(vsdCandidate)[topVarGenesVsd, ], trace = "none", :
## Discrepancy: Colv is FALSE, while dendrogram is `both'. Omitting column
## dendogram.

heatmap_genes(metaboListHedophylum,vsdHedophylum,'Hedophylum','metabolism',colnamesHedophylum)
## Warning in heatmap.2(assay(vsdCandidate)[topVarGenesVsd, ], trace = "none", :
## Discrepancy: Colv is FALSE, while dendrogram is `both'. Omitting column
## dendogram.

heatmap_genes(signalingListHedophylum,vsdHedophylum,'Hedophylum','signaling',colnamesHedophylum)
## Warning in heatmap.2(assay(vsdCandidate)[topVarGenesVsd, ], trace = "none", :
## Discrepancy: Colv is FALSE, while dendrogram is `both'. Omitting column
## dendogram.

heatmap_genes(transportListHedophylum,vsdHedophylum,'Hedophylum','transport',colnamesHedophylum)
## Warning in heatmap.2(assay(vsdCandidate)[topVarGenesVsd, ], trace = "none", :
## Discrepancy: Colv is FALSE, while dendrogram is `both'. Omitting column
## dendogram.

heatmap_genes(stressListHedophylum,vsdHedophylum,'Hedophylum','stress',colnamesHedophylum)
## Warning in heatmap.2(assay(vsdCandidate)[topVarGenesVsd, ], trace = "none", :
## Discrepancy: Colv is FALSE, while dendrogram is `both'. Omitting column
## dendogram.

heatmap_genes(photoListHedophylum,vsdHedophylum,'Hedophylum','respiration',colnamesHedophylum)
## Warning in heatmap.2(assay(vsdCandidate)[topVarGenesVsd, ], trace = "none", :
## Discrepancy: Colv is FALSE, while dendrogram is `both'. Omitting column
## dendogram.

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] grid      stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] EnhancedVolcano_1.12.0      ashr_2.2-54                
##  [3] apeglm_1.16.0               tximport_1.22.0            
##  [5] ggvenn_0.1.10               pairwiseAdonis_0.4.1       
##  [7] cluster_2.1.4               limma_3.50.3               
##  [9] dplyr_1.1.2                 vegan_2.6-4                
## [11] lattice_0.21-8              permute_0.9-7              
## [13] gplots_3.1.3                genefilter_1.76.0          
## [15] RColorBrewer_1.1-3          pheatmap_1.0.12            
## [17] markdown_1.7                ggrepel_0.9.3              
## [19] ggplot2_3.4.2               BiocManager_1.30.20        
## [21] devtools_2.4.5              usethis_2.1.6              
## [23] vsn_3.62.0                  adegenet_2.1.10            
## [25] ade4_1.7-22                 DESeq2_1.34.0              
## [27] SummarizedExperiment_1.24.0 Biobase_2.54.0             
## [29] MatrixGenerics_1.6.0        matrixStats_0.63.0         
## [31] GenomicRanges_1.46.1        GenomeInfoDb_1.30.1        
## [33] IRanges_2.28.0              S4Vectors_0.32.4           
## [35] BiocGenerics_0.40.0         goseq_1.46.0               
## [37] geneLenDataBase_1.30.0      BiasedUrn_2.0.9            
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.3               tidyselect_1.2.0         RSQLite_2.3.1           
##   [4] AnnotationDbi_1.56.2     htmlwidgets_1.6.2        BiocParallel_1.28.3     
##   [7] munsell_0.5.0            preprocessCore_1.56.0    miniUI_0.1.1.1          
##  [10] withr_2.5.0              colorspace_2.1-0         filelock_1.0.2          
##  [13] ggalt_0.4.0              knitr_1.43               rstudioapi_0.14         
##  [16] Rttf2pt1_1.3.12          labeling_0.4.2           bbmle_1.0.25            
##  [19] GenomeInfoDbData_1.2.7   mixsqp_0.3-48            farver_2.1.1            
##  [22] bit64_4.0.5              coda_0.19-4              vctrs_0.6.2             
##  [25] generics_0.1.3           xfun_0.39                BiocFileCache_2.2.1     
##  [28] R6_2.5.1                 ggbeeswarm_0.7.2         invgamma_1.1            
##  [31] locfit_1.5-9.7           bitops_1.0-7             cachem_1.0.8            
##  [34] DelayedArray_0.20.0      vroom_1.6.3              promises_1.2.0.1        
##  [37] BiocIO_1.4.0             scales_1.2.1             beeswarm_0.4.0          
##  [40] gtable_0.3.3             ash_1.0-15               affy_1.72.0             
##  [43] processx_3.8.1           rlang_1.1.1              splines_4.1.2           
##  [46] extrafontdb_1.0          rtracklayer_1.54.0       hexbin_1.28.3           
##  [49] yaml_2.3.7               reshape2_1.4.4           GenomicFeatures_1.46.5  
##  [52] httpuv_1.6.11            extrafont_0.19           tools_4.1.2             
##  [55] affyio_1.64.0            ellipsis_0.3.2           jquerylib_0.1.4         
##  [58] sessioninfo_1.2.2        Rcpp_1.0.10              plyr_1.8.8              
##  [61] progress_1.2.2           zlibbioc_1.40.0          purrr_1.0.1             
##  [64] RCurl_1.98-1.12          ps_1.7.5                 prettyunits_1.1.1       
##  [67] urlchecker_1.0.1         fs_1.6.2                 magrittr_2.0.3          
##  [70] truncnorm_1.0-9          mvtnorm_1.1-3            SQUAREM_2021.1          
##  [73] pkgload_1.3.2            hms_1.1.3                mime_0.12               
##  [76] evaluate_0.21            xtable_1.8-4             XML_3.99-0.14           
##  [79] emdbook_1.3.12           compiler_4.1.2           biomaRt_2.50.3          
##  [82] bdsmatrix_1.3-6          maps_3.4.1               tibble_3.2.1            
##  [85] KernSmooth_2.23-21       crayon_1.5.2             htmltools_0.5.5         
##  [88] tzdb_0.4.0               mgcv_1.8-42              later_1.3.1             
##  [91] geneplotter_1.72.0       DBI_1.1.3                proj4_1.0-12            
##  [94] dbplyr_2.3.2             MASS_7.3-60              rappdirs_0.3.3          
##  [97] readr_2.1.4              Matrix_1.5-1             cli_3.6.1               
## [100] parallel_4.1.2           igraph_1.4.2             pkgconfig_2.0.3         
## [103] GenomicAlignments_1.30.0 numDeriv_2016.8-1.1      xml2_1.3.4              
## [106] annotate_1.72.0          vipor_0.4.5              bslib_0.4.2             
## [109] XVector_0.34.0           stringr_1.5.0            callr_3.7.3             
## [112] digest_0.6.31            Biostrings_2.62.0        rmarkdown_2.21          
## [115] restfulr_0.0.15          curl_5.0.0               shiny_1.7.4             
## [118] Rsamtools_2.10.0         gtools_3.9.4             rjson_0.2.21            
## [121] lifecycle_1.0.3          nlme_3.1-162             jsonlite_1.8.4          
## [124] seqinr_4.2-30            fansi_1.0.4              pillar_1.9.0            
## [127] ggrastr_1.0.1            KEGGREST_1.34.0          fastmap_1.1.1           
## [130] httr_1.4.6               pkgbuild_1.4.0           survival_3.5-5          
## [133] GO.db_3.14.0             glue_1.6.2               remotes_2.4.2           
## [136] png_0.1-8                bit_4.0.5                stringi_1.7.12          
## [139] sass_0.4.6               profvis_0.3.8            blob_1.2.4              
## [142] caTools_1.18.2           memoise_2.0.1            irlba_2.3.5.1           
## [145] ape_5.7-1